class ConvertToText
  def ConvertToText.wpd2txt(content)
    # this sequence seems to occur a few bytes before the text content starts
    special_sequence = "\xDD\xD4\x5F"    
    
    # work only with content after special sequence
    textContentStart = content.index(special_sequence)
    content = content[textContentStart, content.length]
        
    # strip out all "non-word" characters, notably non-ascii chars
    content.gsub!(/[^\w()\.\?\,\[\]:\-]+/, ' ')
    
    # remove single underscores.          
    content.gsub!(/ _ /, ' ')
    
    # eliminate extra whitespace
    return content.squeeze(' ').strip
  end
end
